#!/bin/bash

# script to chat with Llama-3-Chinese-Instruct model
# usage: ./chat.sh llama-3-chinese-instruct-gguf-model-path your-first-instruction
# WARNING: the hyperparameters are not optimal, please tune them yourself

# 模型格式转换
python G:/LLM/lover_llama/script/llama_cpp/llama.cpp/convert_hf_to_gguf.py G:/LLM/lover_llama/script/training/merged_model
    --outtype f16
    --outfile G:/LLM/lover_llama/script/llama_cpp/output_gguf

# 模型量化(llama-quantize.exe在 llama_cpp\llama.cpp\build\bin\Release路径)
llama-quantize.exe G:/LLM/lover_llama/script/llama_cpp/output_gguf/Merged_Model-qwen2-1.5B-F16.gguf G:/LLM/lover_llama/script/llama_cpp/output_quant/Merged_Model-qwen2-1.5B-F16-q4_0.gguf q4_0